Martyna Stasiak id.156071
%matplotlib inline
import cv2
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
from numpy.lib.stride_tricks import as_strided
import PIL
from pandas import DataFrame
import pandas as pd
from IPython.display import display, HTML
from skimage.exposure import rescale_intensity
import json
import os
from itertools import product
import itertools
import random
pd.options.display.html.border = 0
pd.options.display.float_format = '{:,.2f}'.format
!wget -O lena_std.tif http://www.lenna.org/lena_std.tif
!wget -O arc.zip https://github.com/fchollet/ARC/archive/refs/heads/master.zip && unzip -q -o arc.zip
!wget -O chessboard.png https://github.com/opencv/opencv/raw/master/doc/pattern.png
!wget -O clevr.jpg https://cs.stanford.edu/people/jcjohns/clevr/teaser.jpg
!wget -O graf1.png https://github.com/opencv/opencv/raw/master/samples/data/graf1.png
!wget -O graf2.png https://github.com/opencv/opencv/raw/master/samples/data/graf3.png
'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file.
ARC is a dataset of task. A task should be understood as an element consisting of a demonstration of an input and output pair and a query for a given input what is the correct output. Together, they form something like popular intelligence tests where you must demonstrate understanding of some abstract concepts and transformations based on a demonstration and apply them to a query.
The following code loads data into a Task class object. This class contains the following fields:
Note: raster pixels contain numeric data ranging from 0-9, where specific values represent classes. The classes are disordered, that is, there is no < nor > operation.
class Task(object):
pass
def load_task(dir_name, task_file):
with open(os.path.join("./ARC-AGI-master", "data", dir_name, task_file), "rb") as fp:
task = json.load(fp)
def __parse_grid(grid):
in_grid = np.array(grid["input"], np.int32)
out_grid = np.array(grid["output"], np.int32)
return in_grid, out_grid
obj = Task()
obj.name = os.path.basename(task_file)
obj.demo_count, obj.test_count = len(task["train"]), len(task["test"])
obj.demo_inputs, obj.demo_outputs = list(zip(*map(__parse_grid, task["train"])))
obj.demo_in_sizes, obj.demo_out_sizes = [i.shape for i in obj.demo_inputs], [
i.shape for i in obj.demo_outputs
]
obj.test_inputs, obj.test_outputs = list(zip(*map(__parse_grid, task["test"])))
obj.test_in_sizes, obj.test_out_sizes = [i.shape for i in obj.test_inputs], [
i.shape for i in obj.test_outputs
]
return obj
def imshow(img):
img = img.clip(0, 255).astype("uint8")
if img.ndim == 3:
if img.shape[2] == 4:
img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
display(PIL.Image.fromarray(img))
css = """
<style type="text/css">
table, td, table.dataframe, table.dataframe td {
border: 1px solid black; //border: double;
border-collapse: collapse;
border-style: solid;
border-spacing: 0px;
background-color: rgb(250,250,250);
width: 18px;
height: 18px;
text-align: center;
transform: scale(1.0);
margin: 2px;
}
</style>
"""
def h(s):
return display(HTML(css + DataFrame(s).to_html(header=False, index=False)))
def h_color(a, cmap="gray", scale=2):
s = [a.shape[0] * scale, a.shape[1] * scale]
plt.figure(figsize=s)
plt.tick_params(
axis="both",
which="both",
bottom=False,
top=False,
labelbottom=False,
labelleft=False,
left=False,
right=False,
)
plt.imshow(a, cmap=cmap)
cmap = ListedColormap(
[
"black",
"tomato",
"chocolate",
"darkorange",
"gold",
"olive",
"green",
"deepskyblue",
"blueviolet",
"hotpink",
]
)
def h_grid(grid, scale=1):
h_color(grid, cmap, scale)
An intensity domain or spatial domain 2D image is a 2D projection of a 3D scene, represented as a 2D matrix containing pixels. There are various equivalent notations such as RGB, CMYK, HSV, etc., in which each pixel is written in a different way (using different representations).
Consequently, each representation of a 3D scene describes the incident light on the camera matrix, and pixels serve as descriptors for the incident light on a given element.
As each pixel acts as a descriptor for a specific element in the scene, there is a natural correlation between neighboring pixels in the intensity domain, creating a cause-and-effect relationship among them. This interpretation forms the foundation for numerous pixel description methods based on both the pixel and its neighborhood representations, all designed for analyzing the content of the 3D scene.
We will refer to any operation that describes a given pixel or its relationship to its neighborhood as a descriptor. The term descriptor(pixel) is also used to denote the value returned by the descriptor operation. In essence, a descriptor encompasses a set of features that describe a given object, whether it's the entire image, individual pixels, or a function that generates these features.
By loading an image into memory in the form of a 2D raster, we obtain the first, one of the simpler descriptors. Each pixel is described with 3 values determining the "content" of blue, green and red colors.
lena = cv2.imread("./lena_std.tif", 1)
print("Each pixel is described by 3 values", lena.shape, lena[10, 10])
imshow(lena)
Each pixel is described by 3 values (512, 512, 3) [113 131 226]
Another descriptor may be the form "grayscale", in which each pixel has an associated grayscale value. It is one value between 0-255.
lena_gray = cv2.cvtColor(lena, cv2.COLOR_BGR2GRAY)
print("Each pixel is described by 1 value", lena_gray.shape, lena_gray[10, 10])
imshow(lena_gray)
Each pixel is described by 1 value (512, 512) 157
Having many descriptors for a given pixel, we can combine them or create new descriptors based on them. One of the simpler operations of folding descriptors is to append values from all descriptors to form one longer vector.
Below is presented a descriptor that combines an image in BGR and Grayscale space, as a result describing each pixel with four values (Blue, Green, Red, Grayscale).
def simpliest_possible_descriptor(img):
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[..., np.newaxis]
return np.concatenate([img, img_grayscale], -1)
lena_spd = simpliest_possible_descriptor(lena)
print("Each pixel is described by 4 values", lena_spd.shape, lena_spd[10, 10])
Each pixel is described by 4 values (512, 512, 4) [113 131 226 157]
From the previously known (convolutional) image filtering methods, we can distinguish low-pass and high-pass filters. Below is an example of creating two descriptors composed of low pass and high pass filter values.
def high_pass_filter_descriptor(img):
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_laplacian = cv2.Laplacian(img_grayscale, cv2.CV_32F)
img_sobel = cv2.Sobel(img_grayscale, cv2.CV_32F, 1, 1, ksize=3)
return np.stack([img_laplacian, img_sobel], -1)
def low_pass_filter_descriptor(img):
img_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.blur(img_grayscale, (3, 3))
img_gaussian_blur = cv2.GaussianBlur(img_grayscale, (5, 5), 0)
img_median = cv2.medianBlur(img_grayscale, 5)
img_bilateral = cv2.bilateralFilter(img_grayscale, 9, 75, 75)
return np.stack([img_blur, img_gaussian_blur, img_median, img_bilateral], -1)
lena_lpf = low_pass_filter_descriptor(lena)
print("Low pass filter descriptor:", lena_lpf.shape, lena_lpf[10, 10])
lena_hpf = high_pass_filter_descriptor(lena)
print("High pass filter descriptor:", lena_hpf.shape, lena_hpf[10, 10])
Low pass filter descriptor: (512, 512, 4) [157 157 157 157] High pass filter descriptor: (512, 512, 2) [-2. -4.]
Descriptors can be based on any operation. These can be both complex operations based on machine learning systems, as well as precise algorithms describing specific, previously known concepts.
In the example of an image (any image) in ARC below, the following method describes the neighborhood of a given pixel.
p_73251a56 = load_task("training", "73251a56.json")
h_grid(
np.concatenate([p_73251a56.demo_inputs[0], p_73251a56.demo_outputs[0]], 1),
scale=0.5,
)
The method involves checking whether the adjacent pixel, located at the specified position, is the same as the central pixel.
def check_neighbour(img, pos, neighbourhood_size=3):
# parameters requirements
assert neighbourhood_size % 2 == 1
center = int((neighbourhood_size - 1) / 2)
assert pos[0] != 0 or pos[1] != 0
assert (0 <= center + pos[0] < neighbourhood_size) and (
0 <= center + pos[1] < neighbourhood_size
)
# prepare filter for conv2d where center pixel == -1 and selected neighbour == 1
kernel = np.zeros([neighbourhood_size, neighbourhood_size])
kernel[center, center] = -1
kernel[center + pos[0], center + pos[1]] = 1
# perform conv2d and find pixels where center + neighbour == 0
img_n = cv2.filter2D(
img.astype(np.float32),
-1,
kernel.astype(np.float32),
borderType=cv2.BORDER_ISOLATED,
)
img_n = img_n == 0
return img_n
h_color(check_neighbour(p_73251a56.demo_outputs[0], (0, 1)).astype(np.uint8), scale=0.3)
h_color(check_neighbour(p_73251a56.demo_outputs[0], (1, 1)).astype(np.uint8), scale=0.3)
h_color(check_neighbour(p_73251a56.demo_outputs[0], (1, 0)).astype(np.uint8), scale=0.3)
For any ARC image, suggest a descriptor that will calculate the number of the same adjacent pixels as the center pixel.
Then find the pixels they will have:
Parameters:
from numpy.lib.stride_tricks import sliding_window_view
def neighbourhood_descriptor(img):
paded_img = np.pad(img, 2, mode= 'constant', constant_values=0) #padding with 2 pixels border to make sure that boundry pixels wont be ommited by sliding windows
windows = sliding_window_view(paded_img, (3,3)) #creating 3x3 windows to get 3x3 neighbourhoods
#[:,:,1,1] -> 1st: selecting all elements along 1st d-height,2nd: selecting all elements along 2nd d-width, 1 second row, 1 second column
center_pixel = windows[:,:,1,1] #extreacting the center pixel for each window- neighbourhood
identical_count = (windows == center_pixel[:,:,None,None]).sum(axis=(2,3)) - 1
return identical_count[1:-1,1:-1]
demo_output = p_73251a56.demo_inputs[0]
demo_output_n = neighbourhood_descriptor(demo_output)
print("A pixel descriptor that specifies the number of identical neighbors:")
h(demo_output_n)
print("\nInput raster and descriptors found")
h_grid(demo_output, scale=0.3)
h_color(np.concatenate([demo_output_n == 4, demo_output_n > 4], 1), scale=0.4)
A pixel descriptor that specifies the number of identical neighbors:
| 1 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| 2 | 4 | 4 | 3 | 2 | 3 | 3 | 1 | 2 | 2 | 1 | 2 | 1 | 1 | 2 | 1 | 1 | 2 | 1 | 1 | 1 |
| 2 | 4 | 2 | 5 | 5 | 4 | 3 | 4 | 4 | 4 | 2 | 2 | 3 | 3 | 1 | 2 | 2 | 2 | 1 | 2 | 1 |
| 2 | 3 | 5 | 2 | 6 | 6 | 6 | 4 | 4 | 5 | 4 | 3 | 1 | 3 | 3 | 3 | 4 | 4 | 2 | 2 | 1 |
| 1 | 2 | 5 | 6 | 2 | 6 | 7 | 7 | 6 | 4 | 3 | 3 | 5 | 3 | 3 | 4 | 4 | 4 | 4 | 4 | 3 |
| 1 | 3 | 4 | 6 | 6 | 2 | 6 | 7 | 8 | 7 | 5 | 3 | 5 | 3 | 5 | 7 | 6 | 5 | 4 | 4 | 3 |
| 1 | 3 | 3 | 6 | 7 | 6 | 2 | 5 | 5 | 5 | 5 | 5 | 5 | 4 | 3 | 6 | 7 | 8 | 7 | 6 | 3 |
| 1 | 1 | 4 | 4 | 7 | 7 | 6 | 1 | 3 | 5 | 3 | 6 | 8 | 7 | 6 | 4 | 4 | 6 | 7 | 8 | 5 |
| 1 | 2 | 4 | 4 | 6 | 8 | 7 | 4 | 5 | 8 | 5 | 5 | 8 | 8 | 8 | 7 | 6 | 4 | 4 | 6 | 4 |
| 1 | 2 | 4 | 5 | 4 | 7 | 8 | 6 | 3 | 5 | 3 | 5 | 8 | 8 | 8 | 8 | 8 | 7 | 6 | 4 | 2 |
| 1 | 1 | 2 | 5 | 4 | 6 | 8 | 7 | 6 | 3 | 0 | 3 | 5 | 5 | 5 | 5 | 6 | 7 | 8 | 7 | 4 |
| 1 | 2 | 2 | 5 | 6 | 4 | 7 | 8 | 8 | 5 | 3 | 5 | 5 | 5 | 5 | 5 | 3 | 6 | 8 | 8 | 5 |
| 1 | 1 | 3 | 4 | 6 | 4 | 6 | 8 | 8 | 6 | 3 | 6 | 7 | 8 | 8 | 8 | 5 | 5 | 8 | 8 | 5 |
| 1 | 1 | 3 | 3 | 6 | 6 | 4 | 7 | 8 | 7 | 6 | 3 | 6 | 8 | 8 | 8 | 5 | 5 | 8 | 8 | 5 |
| 1 | 2 | 1 | 3 | 5 | 7 | 4 | 6 | 8 | 8 | 8 | 6 | 3 | 5 | 5 | 5 | 3 | 6 | 8 | 8 | 5 |
| 1 | 1 | 2 | 3 | 4 | 7 | 6 | 4 | 7 | 8 | 8 | 7 | 6 | 5 | 4 | 1 | 4 | 6 | 8 | 8 | 5 |
| 1 | 1 | 2 | 4 | 4 | 6 | 7 | 4 | 6 | 8 | 8 | 8 | 8 | 8 | 7 | 6 | 2 | 6 | 7 | 8 | 5 |
| 1 | 2 | 2 | 4 | 4 | 5 | 8 | 6 | 4 | 6 | 6 | 5 | 5 | 6 | 7 | 7 | 6 | 2 | 6 | 7 | 5 |
| 1 | 1 | 1 | 2 | 4 | 4 | 7 | 7 | 4 | 4 | 3 | 5 | 5 | 3 | 6 | 8 | 7 | 6 | 2 | 6 | 4 |
| 1 | 1 | 2 | 2 | 4 | 4 | 6 | 8 | 6 | 2 | 3 | 5 | 5 | 3 | 6 | 8 | 8 | 7 | 6 | 2 | 3 |
| 0 | 1 | 1 | 1 | 3 | 3 | 3 | 5 | 4 | 2 | 2 | 2 | 2 | 3 | 4 | 5 | 5 | 5 | 4 | 3 | 1 |
Input raster and descriptors found
The purpose of the descriptors is to succinctly describe the image in such a way that it is possible to identify essential characteristics that facilitate comparison with other images.
Global Descriptor: A global descriptor characterizes the entire image as a whole. Creating a good global descriptor is challenging because it must capture all crucial information while maintaining a compact size. This process is essentially a task of compression, and it is sensitive to changes in any part of the image, which can significantly impact the descriptor and lead to potential mismatches.
The entire image, represented by all its pixels, can serve as a global descriptor. However, comparing images pixel by pixel is highly inefficient and extremely sensitive to transformations such as scaling, rotation, or noise.
Local Descriptor: A local descriptor provides a concise representation of a point's surrounding neighborhood. It focuses on capturing the shape and appearance of the immediate area around the point, making it highly effective for matching purposes within that specific context.
Generating descriptors for every pixel can be computationally inefficient and time-consuming.
Moreover, images often include regions composed of identical or closely resembling pixels, making it unfeasible to match such regions across images.
The solution is to identify distinctive points within the image, known as key points. Key points represent unique or discernible locations in an image, making them easily identifiable across different images.
The process of comparing images based on key points is termed keypoint matching. This involves comparing the key points between both images (point-to-point) and determining the closest key point in the second image for each input key point.
There is also a variation where we discover multiple matches for each input key point.
The key point detection mechanism is closely related to the descriptor itself. For example, for the Laplasian edge detection, it can be proposed to find pixels with extreme values (e.g. -4 and 4).
For an ARC image, let's define a descriptor that counts adjacent pixels different from the center pixel.
Note: The descriptor is based on the solution of Task 1.
def not_neighbourhood_descriptor(img):
return 8 - neighbourhood_descriptor(img)
p_73251a56_in0 = p_73251a56.demo_inputs[0]
p_73251a56_in0_nn = not_neighbourhood_descriptor(p_73251a56_in0)
print("Pixel descriptor specifying the number of different neighbors:")
h(p_73251a56_in0_nn)
Pixel descriptor specifying the number of different neighbors:
| 7 | 6 | 6 | 6 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 8 |
| 6 | 4 | 4 | 5 | 6 | 5 | 5 | 7 | 6 | 6 | 7 | 6 | 7 | 7 | 6 | 7 | 7 | 6 | 7 | 7 | 7 |
| 6 | 4 | 6 | 3 | 3 | 4 | 5 | 4 | 4 | 4 | 6 | 6 | 5 | 5 | 7 | 6 | 6 | 6 | 7 | 6 | 7 |
| 6 | 5 | 3 | 6 | 2 | 2 | 2 | 4 | 4 | 3 | 4 | 5 | 7 | 5 | 5 | 5 | 4 | 4 | 6 | 6 | 7 |
| 7 | 6 | 3 | 2 | 6 | 2 | 1 | 1 | 2 | 4 | 5 | 5 | 3 | 5 | 5 | 4 | 4 | 4 | 4 | 4 | 5 |
| 7 | 5 | 4 | 2 | 2 | 6 | 2 | 1 | 0 | 1 | 3 | 5 | 3 | 5 | 3 | 1 | 2 | 3 | 4 | 4 | 5 |
| 7 | 5 | 5 | 2 | 1 | 2 | 6 | 3 | 3 | 3 | 3 | 3 | 3 | 4 | 5 | 2 | 1 | 0 | 1 | 2 | 5 |
| 7 | 7 | 4 | 4 | 1 | 1 | 2 | 7 | 5 | 3 | 5 | 2 | 0 | 1 | 2 | 4 | 4 | 2 | 1 | 0 | 3 |
| 7 | 6 | 4 | 4 | 2 | 0 | 1 | 4 | 3 | 0 | 3 | 3 | 0 | 0 | 0 | 1 | 2 | 4 | 4 | 2 | 4 |
| 7 | 6 | 4 | 3 | 4 | 1 | 0 | 2 | 5 | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 4 | 6 |
| 7 | 7 | 6 | 3 | 4 | 2 | 0 | 1 | 2 | 5 | 8 | 5 | 3 | 3 | 3 | 3 | 2 | 1 | 0 | 1 | 4 |
| 7 | 6 | 6 | 3 | 2 | 4 | 1 | 0 | 0 | 3 | 5 | 3 | 3 | 3 | 3 | 3 | 5 | 2 | 0 | 0 | 3 |
| 7 | 7 | 5 | 4 | 2 | 4 | 2 | 0 | 0 | 2 | 5 | 2 | 1 | 0 | 0 | 0 | 3 | 3 | 0 | 0 | 3 |
| 7 | 7 | 5 | 5 | 2 | 2 | 4 | 1 | 0 | 1 | 2 | 5 | 2 | 0 | 0 | 0 | 3 | 3 | 0 | 0 | 3 |
| 7 | 6 | 7 | 5 | 3 | 1 | 4 | 2 | 0 | 0 | 0 | 2 | 5 | 3 | 3 | 3 | 5 | 2 | 0 | 0 | 3 |
| 7 | 7 | 6 | 5 | 4 | 1 | 2 | 4 | 1 | 0 | 0 | 1 | 2 | 3 | 4 | 7 | 4 | 2 | 0 | 0 | 3 |
| 7 | 7 | 6 | 4 | 4 | 2 | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 6 | 2 | 1 | 0 | 3 |
| 7 | 6 | 6 | 4 | 4 | 3 | 0 | 2 | 4 | 2 | 2 | 3 | 3 | 2 | 1 | 1 | 2 | 6 | 2 | 1 | 3 |
| 7 | 7 | 7 | 6 | 4 | 4 | 1 | 1 | 4 | 4 | 5 | 3 | 3 | 5 | 2 | 0 | 1 | 2 | 6 | 2 | 4 |
| 7 | 7 | 6 | 6 | 4 | 4 | 2 | 0 | 2 | 6 | 5 | 3 | 3 | 5 | 2 | 0 | 0 | 1 | 2 | 6 | 5 |
| 8 | 7 | 7 | 7 | 5 | 5 | 5 | 3 | 4 | 6 | 6 | 6 | 6 | 5 | 4 | 3 | 3 | 3 | 4 | 5 | 7 |
A key point is one where the neighbors have great variety in color, class from the key point. Let's only select pixels that have at least 6 different pixels and are not at the border of the image.
p_73251a56_in0_sel = p_73251a56_in0_nn >= 7
p_73251a56_in0_sel[0, :] = False
p_73251a56_in0_sel[-1, :] = False
p_73251a56_in0_sel[:, 0] = False
p_73251a56_in0_sel[:, -1] = False
print("Key points (matrix):")
h(p_73251a56_in0_sel.astype(np.uint8))
kp = np.where(p_73251a56_in0_sel)
print("\nKey pointse (x, y):")
print("Number of key points:", len(kp[0]))
print("x:", kp[0])
print("y:", kp[1])
Key points (matrix):
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Key pointse (x, y): Number of key points: 24 x: [ 1 1 1 1 1 1 1 1 2 2 3 7 7 10 10 12 13 14 15 15 16 18 18 19] y: [ 7 10 12 13 15 16 18 19 14 18 12 1 7 1 10 1 1 2 1 15 1 1 2 1]
for xi, yi in zip(*kp):
h_grid(p_73251a56_in0[xi - 1 : xi + 2, yi - 1 : yi + 2])
C:\Users\mmart\AppData\Local\Temp\ipykernel_22696\2468672464.py:3: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. plt.figure(figsize=s)
Next, let's define another descriptor which is a vector of its neighbors.
def copy_descriptor(arr):
shape = list(arr.shape)
arr = np.pad(arr, 1, "constant")
nDims = len(arr.shape)
newStrides = arr.strides + arr.strides
return as_strided(arr, shape=shape + [3, 3], strides=newStrides).reshape(
shape + [9]
)
p_73251a56_in0_nncopy = copy_descriptor(p_73251a56_in0)
print("Sample pixel neighbors")
print(p_73251a56_in0[0:3, 0:3])
print("\nPixel descriptor with position (1, 1) containing its neighborhood")
print(p_73251a56_in0_nncopy[1, 1])
Sample pixel neighbors [[1 6 1] [6 1 6] [1 6 1]] Pixel descriptor with position (1, 1) containing its neighborhood [1 6 1 6 1 6 1 6 1]
For all detected key points in the previous section, let's get the corresponding descriptors as calculated above.
desc = np.stack([p_73251a56_in0_nncopy[xi, yi] for xi, yi in zip(*kp)], 0)
Now let's perform analogous transformations for another image from the same ARC task.
p_73251a56_out0 = p_73251a56.demo_outputs[0]
p_73251a56_out0_nn = not_neighbourhood_descriptor(p_73251a56_out0)
p_73251a56_out0_sel = p_73251a56_out0_nn >= 7
p_73251a56_out0_sel[0, :] = False
p_73251a56_out0_sel[-1, :] = False
p_73251a56_out0_sel[:, 0] = False
p_73251a56_out0_sel[:, -1] = False
p_73251a56_out0_nncopy = copy_descriptor(p_73251a56_out0)
kp_ = np.where(p_73251a56_out0_sel)
desc_ = np.stack([p_73251a56_out0_nncopy[xi, yi] for xi, yi in zip(*kp_)], 0)
print("\nKey points (x, y):")
print("Number of key points:", len(kp_[0]))
print("x:", kp_[0])
print("y:", kp_[1])
Key points (x, y): Number of key points: 20 x: [ 1 1 1 1 1 1 1 1 2 2 7 10 12 13 14 15 16 18 18 19] y: [ 7 10 12 13 15 16 18 19 14 18 1 1 1 1 2 1 1 1 2 1]
for xi, yi in zip(*kp_):
h_grid(p_73251a56_out0[xi - 1 : xi + 2, yi - 1 : yi + 2])
Therefore, in the case of two images, we identify their respective key points and generate an additional descriptor designed to effectively characterize a given pixel and its surroundings.
The subsequent phase in the matching process involves defining the matching function. The choice of the matching function is closely tied to the behavior of the descriptor. When dealing with data residing in the same space, it's possible to propose a distance / similarity function for the two descriptors (e.g., in a Euclidean space, you can compute the Euclidean distance between the two vectors).
For the given configuration, where the pixel descriptor is based on its neighboring pixels arranged in a specific order, we can propose a similarity function that calculates the percentage of shared components between the two descriptors.+
def nn_sim(desc1, desc2):
return (desc1 == desc2).mean(-1)
a = np.array([0, 5, 2, 4, 9])
b = np.array([0, 5, 1, 4, 0])
print(nn_sim(a, b))
print(nn_sim(a, a))
0.6 1.0
desc_sim = nn_sim(desc[:, np.newaxis], desc_[np.newaxis])
print("The size of the similarity matrix:", desc_sim.shape)
print("\nSimilarities for the first input key point:")
print(desc_sim[0])
desc_nearest = np.argmax(desc_sim, -1)
desc_nearest_sim = np.max(desc_sim, -1)
print("\nBest matches:", desc_nearest)
print("\nBest matches similarities:", desc_nearest_sim)
The size of the similarity matrix: (24, 20) Similarities for the first input key point: [1. 0.11111111 0. 0. 0. 0. 0.11111111 0.33333333 0.11111111 0. 0.33333333 0.11111111 0.22222222 0.33333333 0.11111111 0.11111111 0. 0. 0.11111111 0.22222222] Best matches: [ 0 1 2 3 4 5 6 7 8 9 14 10 2 11 12 12 13 14 15 12 16 17 18 19] Best matches similarities: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.33333333 1. 0.22222222 1. 0.22222222 1. 1. 1. 1. 0.22222222 1. 1. 1. 1. ]
for xi, yi, nearest, nearest_sim in zip(*kp, desc_nearest, desc_nearest_sim):
xi_, yi_ = kp_[0][nearest], kp_[1][nearest]
query = p_73251a56_in0[xi - 1 : xi + 2, yi - 1 : yi + 2]
match = p_73251a56_out0[xi_ - 1 : xi_ + 2, yi_ - 1 : yi_ + 2]
print("Similarity:", nearest_sim)
h_grid(np.concatenate([query, match], 1))
Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 0.3333333333333333 Similarity: 1.0 Similarity: 0.2222222222222222 Similarity: 1.0 Similarity: 0.2222222222222222 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 0.2222222222222222 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0 Similarity: 1.0
C:\Users\mmart\AppData\Local\Temp\ipykernel_22696\2468672464.py:3: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. plt.figure(figsize=s)
For the above results, the following facts can be noted:
Therefore, matches should only occur if the similarity is greater than a certain threshold.
threshold = 0.7
matches = [
(i, j)
for i, j in zip(range(len(desc_nearest)), desc_nearest)
if desc_nearest_sim[i] > threshold
]
print(len(matches))
print(matches)
20 [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (11, 10), (13, 11), (15, 12), (16, 13), (17, 14), (18, 15), (20, 16), (21, 17), (22, 18), (23, 19)]
To assess the similarity between two images, we can introduce additional metrics to measure their similarity, including:
in_kp_score = len(matches) / len(kp[0])
ref_kp_score = len(set([j for _, j in matches])) / len(kp_[0])
print(in_kp_score)
print(ref_kp_score)
0.8333333333333334 1.0
Finally, to find out if the images are similar or not, you can add a condition on the similarity statistics calculated above. The simplest condition will be the cut off condition:
is_similar = in_kp_score > 0.8 and ref_kp_score > 0.8
print(is_similar)
True
Write an algorithm that finds all similar images for the input image (query) and the reference image set. The algorithm should specify the following steps:
Note:
(building a good descriptor can be really demanding),
#images:
p_73251a56 = load_task("training", "73251a56.json")
p1_input = p_73251a56.demo_inputs[0]
p1_output = p_73251a56.demo_outputs
h_grid(
np.concatenate([p1_input, p1_output[0]], 1),
scale=0.5,
)
p_72322fa7 = load_task("training", "72322fa7.json")
p2_input = p_72322fa7.demo_inputs[0]
p2_output = p_72322fa7.demo_outputs
h_grid(
np.concatenate([p2_input, p2_output[0]], 1),
scale=0.5,
)
p_776ffc46 = load_task("training", "776ffc46.json")
p3_input = p_776ffc46.demo_inputs[0]
p3_output = p_776ffc46.demo_outputs
h_grid(
np.concatenate([p3_input, p3_output[0]], 1),
scale=0.5,
)
p_54d82841 = load_task("training", "54d82841.json")
p4_input = p_54d82841.demo_inputs[0]
p4_output = p_54d82841.demo_outputs
h_grid(
np.concatenate([p4_input, p4_output[0]], 1),
scale=0.5,
)
ref_images = [p1_output[0],p1_output[1],p1_output[2], p2_output[0],p2_output[1], p3_output[0], p3_output[1], p4_output[0], p4_output[1]]
h(p1_input)
| 1 | 6 | 1 | 1 | 2 | 2 | 3 | 3 | 4 | 4 | 5 | 5 | 6 | 6 | 1 | 1 | 2 | 2 | 3 | 3 | 4 |
| 6 | 1 | 6 | 6 | 1 | 1 | 1 | 2 | 2 | 2 | 3 | 3 | 3 | 4 | 4 | 4 | 5 | 5 | 5 | 6 | 6 |
| 1 | 6 | 1 | 6 | 6 | 6 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 4 | 4 | 4 |
| 1 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 3 | 3 | 3 |
| 2 | 1 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 6 | 1 | 0 | 0 | 0 | 1 | 1 | 2 | 2 | 2 | 2 | 2 |
| 2 | 1 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 2 | 2 |
| 3 | 1 | 1 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| 3 | 2 | 1 | 6 | 6 | 6 | 6 | 1 | 0 | 0 | 0 | 6 | 6 | 6 | 6 | 6 | 1 | 1 | 1 | 1 | 1 |
| 4 | 2 | 1 | 1 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 | 1 | 1 |
| 4 | 2 | 1 | 1 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 |
| 5 | 3 | 2 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 |
| 5 | 3 | 2 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 |
| 6 | 3 | 2 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 |
| 6 | 4 | 2 | 2 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 |
| 1 | 4 | 3 | 2 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 |
| 1 | 4 | 3 | 2 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 6 | 6 |
| 2 | 5 | 3 | 2 | 2 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 6 |
| 2 | 5 | 3 | 2 | 2 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 | 6 | 6 | 6 |
| 3 | 5 | 4 | 3 | 2 | 1 | 1 | 1 | 1 | 6 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 | 1 | 6 | 6 |
| 3 | 6 | 4 | 3 | 2 | 2 | 1 | 1 | 1 | 6 | 0 | 0 | 0 | 0 | 6 | 6 | 6 | 6 | 6 | 1 | 6 |
| 4 | 6 | 4 | 3 | 2 | 2 | 1 | 1 | 1 | 1 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 1 |
#descriptor
def my_descriptor(img):
#here we get that when we have higher values that the pixels in the neighbourhood differ from the center pixel
#and the lower values like 0 and the ones close to it mean that there is a flat region
#some of the code is reused from the previous descriptor from task 1
paded_img = np.pad(img, 2, mode= 'constant', constant_values=0) #padding with 2 pixels border to make sure that boundry pixels wont be ommited by sliding windows
windows = sliding_window_view(paded_img, (3,3)) #creating 3x3 windows to get 3x3 neighbourhoods
#[:,:,1,1] -> 1st: selecting all elements along 1st d-height,2nd: selecting all elements along 2nd d-width, 1 second row, 1 second column
center_pixel = windows[:,:,1,1] #extreacting the center pixel for each window- neighbourhood
window_deviation = np.abs(windows - center_pixel[:, :, None, None]) #deviation of the neighbourhood from the center pixel
mean_deviation = np.mean(window_deviation, axis = (-1,-2)) #mean deviation of the neighbourhood from the center pixel
return mean_deviation[1:-1, 1:-1]
desc = my_descriptor(p1_input)
print("A pixel descriptor that specifies how much on average the neighbouring pixels differ form the center pixel:")
h(desc)
print(desc.shape)
print(desc[19,19])
A pixel descriptor that specifies how much on average the neighbouring pixels differ form the center pixel:
| 1.67 | 3.67 | 2.00 | 1.56 | 1.44 | 1.11 | 1.67 | 1.56 | 2.11 | 2.00 | 2.56 | 2.44 | 3.00 | 3.33 | 1.89 | 1.56 | 1.67 | 1.78 | 1.89 | 2.00 | 2.78 |
| 3.67 | 2.22 | 2.22 | 2.67 | 2.44 | 1.56 | 1.22 | 0.89 | 0.89 | 1.11 | 1.11 | 1.11 | 1.33 | 1.44 | 1.33 | 1.33 | 1.89 | 1.44 | 1.33 | 1.67 | 3.00 |
| 2.00 | 2.22 | 3.33 | 1.67 | 1.67 | 2.22 | 2.33 | 1.33 | 0.89 | 0.56 | 0.67 | 0.67 | 0.67 | 0.78 | 0.78 | 0.78 | 0.89 | 1.00 | 1.00 | 0.89 | 2.00 |
| 1.56 | 2.67 | 1.67 | 3.33 | 1.11 | 1.11 | 1.11 | 2.22 | 2.22 | 1.22 | 0.89 | 0.56 | 0.78 | 0.78 | 0.67 | 0.56 | 0.44 | 0.56 | 0.67 | 0.67 | 1.44 |
| 1.44 | 2.44 | 1.67 | 1.11 | 3.33 | 1.11 | 0.56 | 0.56 | 1.11 | 2.22 | 1.89 | 1.11 | 0.44 | 0.78 | 0.56 | 0.44 | 0.44 | 0.44 | 0.44 | 0.44 | 0.89 |
| 1.11 | 1.56 | 2.22 | 1.11 | 1.11 | 3.33 | 1.11 | 0.56 | 0.00 | 0.56 | 1.89 | 2.78 | 2.00 | 1.67 | 0.78 | 0.11 | 0.22 | 0.33 | 0.44 | 0.44 | 0.89 |
| 1.67 | 1.22 | 2.33 | 1.11 | 0.56 | 1.11 | 3.33 | 1.78 | 1.89 | 2.00 | 2.00 | 2.00 | 2.00 | 2.44 | 2.33 | 1.11 | 0.56 | 0.00 | 0.11 | 0.22 | 0.56 |
| 1.56 | 0.89 | 1.33 | 2.22 | 0.56 | 0.56 | 1.11 | 3.00 | 2.78 | 2.00 | 3.33 | 1.33 | 0.00 | 0.56 | 1.11 | 2.22 | 2.22 | 1.11 | 0.56 | 0.00 | 0.33 |
| 2.11 | 0.89 | 0.89 | 2.22 | 1.11 | 0.00 | 0.56 | 2.56 | 1.44 | 0.00 | 2.00 | 2.00 | 0.00 | 0.00 | 0.00 | 0.56 | 1.11 | 2.22 | 2.22 | 1.11 | 0.89 |
| 2.00 | 1.11 | 0.56 | 1.22 | 2.22 | 0.56 | 0.00 | 1.33 | 3.33 | 1.44 | 2.78 | 1.89 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.56 | 1.11 | 2.22 | 2.00 |
| 2.56 | 1.11 | 0.67 | 0.78 | 2.22 | 1.11 | 0.00 | 0.67 | 1.33 | 3.22 | 2.67 | 3.22 | 2.00 | 2.00 | 2.00 | 2.00 | 1.33 | 0.67 | 0.00 | 0.56 | 2.56 |
| 2.44 | 1.11 | 0.67 | 0.33 | 1.11 | 2.22 | 0.56 | 0.00 | 0.00 | 1.89 | 2.78 | 1.44 | 2.00 | 2.00 | 2.00 | 2.00 | 3.33 | 1.33 | 0.00 | 0.00 | 2.00 |
| 3.00 | 1.33 | 0.67 | 0.44 | 0.67 | 2.22 | 1.11 | 0.00 | 0.00 | 1.33 | 3.33 | 1.33 | 0.67 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 0.00 | 0.00 | 2.00 |
| 3.33 | 1.44 | 0.78 | 0.56 | 0.22 | 1.11 | 2.22 | 0.56 | 0.00 | 0.67 | 1.33 | 3.33 | 1.33 | 0.00 | 0.00 | 0.00 | 2.00 | 2.00 | 0.00 | 0.00 | 2.00 |
| 1.89 | 1.33 | 0.78 | 0.56 | 0.33 | 0.56 | 2.22 | 1.11 | 0.00 | 0.00 | 0.00 | 1.33 | 3.33 | 2.00 | 1.44 | 1.44 | 2.78 | 1.33 | 0.00 | 0.00 | 2.00 |
| 1.56 | 1.33 | 0.78 | 0.56 | 0.44 | 0.11 | 1.11 | 2.22 | 0.56 | 0.00 | 0.00 | 0.67 | 1.33 | 2.00 | 2.56 | 2.56 | 2.44 | 1.22 | 0.00 | 0.00 | 2.00 |
| 1.67 | 1.89 | 0.89 | 0.44 | 0.44 | 0.22 | 0.56 | 2.22 | 1.11 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.56 | 1.11 | 3.33 | 1.11 | 0.56 | 0.00 | 2.00 |
| 1.78 | 1.44 | 1.00 | 0.56 | 0.44 | 0.33 | 0.00 | 1.11 | 2.22 | 1.22 | 1.33 | 2.00 | 2.00 | 1.33 | 0.67 | 0.56 | 1.11 | 3.33 | 1.11 | 0.56 | 2.00 |
| 1.89 | 1.33 | 1.00 | 0.67 | 0.44 | 0.44 | 0.11 | 0.56 | 2.22 | 2.44 | 3.33 | 2.00 | 2.00 | 3.33 | 1.33 | 0.00 | 0.56 | 1.11 | 3.33 | 1.11 | 2.56 |
| 2.00 | 1.67 | 0.89 | 0.67 | 0.44 | 0.44 | 0.22 | 0.00 | 1.11 | 3.56 | 2.78 | 2.00 | 2.00 | 3.33 | 1.33 | 0.00 | 0.00 | 0.56 | 1.11 | 3.33 | 3.11 |
| 2.78 | 3.00 | 2.00 | 1.44 | 0.89 | 0.89 | 0.56 | 0.33 | 0.89 | 1.56 | 3.89 | 4.00 | 4.00 | 3.33 | 2.67 | 2.00 | 2.00 | 2.00 | 2.56 | 3.11 | 1.67 |
(21, 21) 3.3333333333333335
#looking for the keypoints
def find_keypoints(descriptor, treshold = 2):
possible_key_points = descriptor >= treshold #finding the pixels that have a higher value than the treshold
kp_with_border = np.where(possible_key_points)
num_kp_with_border = len(kp_with_border[0]) #number of key points with border
#excluding the border of the picture
possible_key_points[0, :] = False
possible_key_points[-1, :] = False
possible_key_points[:, 0] = False
possible_key_points[:, -1] = False
keypoints = np.where(possible_key_points) #finding the key points indexes
number_of_keypoints = len(keypoints[0])
return keypoints, num_kp_with_border, number_of_keypoints, possible_key_points
keypoints, num_kp_with_border, number_of_keypoints, possible_key_points = find_keypoints(desc)
print(f"Number of key points that we would have without excluding the picture's border: {num_kp_with_border}")
print(f"Number of actual key points (with border exclusion): {number_of_keypoints}")
h(possible_key_points.astype(np.uint8))
print("Key points x and y:")
print(f"x: {keypoints[0]}")
print(f"y: {keypoints[1]}")
Number of key points that we would have without excluding the picture's border: 140 Number of actual key points (with border exclusion): 95
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Key points x and y: x: [ 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 5 5 5 5 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 8 8 8 8 8 8 9 9 9 9 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 12 12 12 12 13 13 13 13 14 14 14 14 15 15 15 15 15 16 16 17 17 17 17 18 18 18 18 18 18 18 19 19 19 19 19 19] y: [ 1 2 3 4 1 2 5 6 1 3 7 8 1 4 9 2 5 11 12 2 6 9 10 11 12 13 14 3 7 8 9 10 15 16 3 7 10 11 17 18 4 8 10 19 4 9 10 11 12 13 14 15 5 10 12 13 14 15 16 5 10 16 17 6 11 16 17 6 12 13 16 7 13 14 15 16 7 16 8 11 12 17 8 9 10 11 12 13 18 9 10 11 12 13 19]
# transformation of the query image into a list of key points and their descriptors
#here i used the code from the copy_descriptor function from above
def keypoints_descriptors(img):
desc = my_descriptor(img)
keypoints, num_kp_with_border, number_of_keypoints, possible_key_points = find_keypoints(desc)
shape = list(img.shape)
img = np.pad(img, 2, 'constant')
number_Dimentions = len(img.shape)
newStrides = img.strides + img.strides
img_copy = as_strided(img, shape=shape + [3, 3], strides=newStrides).reshape(shape + [9])
descriptors = np.stack([img_copy[xi, yi] for xi, yi in zip(*keypoints)], 0)
return descriptors
print(p1_input.shape)
descriptors= keypoints_descriptors(p1_input)
print("Descriptors of the keypoints:")
print(descriptors)
(21, 21) Descriptors of the keypoints: [[0 0 0 0 1 6 0 6 1] [0 0 0 1 6 1 6 1 6] [0 0 0 6 1 1 1 6 6] [0 0 0 1 1 2 6 6 1] [0 1 6 0 6 1 0 1 6] [1 6 1 6 1 6 1 6 1] [1 2 2 6 1 1 6 6 6] [2 2 3 1 1 1 6 6 1] [0 6 1 0 1 6 0 1 6] [1 6 6 6 1 6 6 6 1] [1 1 2 6 1 1 6 6 6] [1 2 2 1 1 1 6 6 1] [0 1 6 0 1 6 0 2 1] [1 6 6 6 1 6 6 6 1] [1 1 1 6 1 1 6 6 6] [1 6 6 2 1 6 2 1 6] [1 6 6 6 1 6 6 6 1] [1 1 1 6 1 0 6 6 0] [1 1 1 1 0 0 6 0 0] [2 1 6 2 1 6 3 1 1] [1 6 6 6 1 6 6 6 1] [6 6 6 6 6 6 6 6 6] [6 6 1 6 6 6 6 6 6] [6 1 0 6 6 0 6 6 6] [1 0 0 6 0 0 6 6 6] [0 0 0 0 0 0 6 6 6] [0 0 1 0 0 1 6 6 1] [1 6 6 1 1 6 2 1 6] [1 6 6 6 1 6 6 6 1] [6 6 6 1 6 6 6 1 0] [6 6 6 6 6 6 1 0 0] [6 6 6 6 6 6 0 0 0] [0 1 1 6 1 1 6 6 6] [1 1 1 1 1 1 6 6 1] [1 1 6 2 1 6 2 1 1] [6 1 6 6 6 1 6 6 6] [6 6 6 0 0 0 0 0 0] [6 6 6 0 0 6 0 0 6] [1 1 1 6 1 1 6 6 6] [1 1 1 1 1 1 6 6 1] [1 6 6 1 1 6 1 1 6] [6 1 0 6 6 0 6 6 0] [0 0 0 0 0 0 0 0 0] [1 1 1 6 1 1 6 6 6] [1 1 6 1 1 6 2 1 1] [6 0 0 6 0 0 6 6 6] [0 0 0 0 0 0 6 6 1] [0 0 6 0 0 6 6 1 6] [0 6 6 0 6 6 1 6 6] [6 6 6 6 6 6 6 6 6] [6 6 6 6 6 6 6 6 6] [6 6 6 6 6 6 6 6 6] [1 6 6 1 1 6 1 1 6] [0 0 0 6 6 1 6 6 0] [0 6 6 1 6 6 0 0 0] [6 6 6 6 6 6 0 0 0] [6 6 6 6 6 6 0 0 0] [6 6 6 6 6 6 0 0 0] [6 6 6 6 6 6 0 0 0] [1 1 6 1 1 6 1 1 1] [6 6 1 6 6 0 6 6 0] [6 6 6 0 0 0 0 0 0] [6 6 6 0 0 6 0 0 6] [1 6 6 1 1 6 1 1 6] [6 0 0 6 0 0 6 6 6] [0 0 0 0 0 0 0 0 0] [0 0 6 0 0 6 0 0 6] [1 1 6 1 1 6 1 1 1] [0 0 0 6 6 0 6 6 0] [0 0 0 6 0 0 6 0 0] [0 0 0 0 0 0 0 0 0] [1 6 6 1 1 6 1 1 6] [6 0 0 6 0 0 6 6 6] [0 0 0 0 0 0 6 6 6] [0 0 0 0 0 0 6 6 1] [0 0 0 0 0 0 6 1 6] [1 1 6 1 1 6 1 1 1] [0 0 0 6 1 6 6 6 1] [1 6 6 1 1 6 1 1 6] [6 6 6 6 6 6 6 6 6] [6 6 6 6 6 6 6 6 6] [1 6 6 6 1 6 6 6 1] [1 1 6 1 1 6 1 1 1] [1 6 6 1 6 6 1 1 6] [6 6 6 6 6 6 1 6 0] [6 6 6 6 6 6 6 0 0] [6 6 6 6 6 6 0 0 0] [6 6 6 6 6 6 0 0 0] [1 6 6 6 1 6 6 6 1] [1 6 6 1 1 6 1 1 6] [6 6 6 1 6 0 1 6 0] [6 6 6 6 0 0 6 0 0] [6 6 6 0 0 0 0 0 0] [6 6 6 0 0 0 0 0 0] [1 6 6 6 1 6 6 6 1]]
#at this point google colab failed to save the code
#transformation of the reference image set into a set of lists of key points and their descriptors
def descriptors_for_kp_of_refset(ref_images):
descriptors_for_ref = [] #list of descriptors for each reference image
for image in ref_images:
descriptors_for_ref.append(keypoints_descriptors(image))
return descriptors_for_ref
descs= descriptors_for_kp_of_refset(ref_images)
descs
[array([[0, 0, 0, 0, 1, 6, 0, 6, 1],
[0, 0, 0, 1, 6, 1, 6, 1, 6],
[0, 0, 0, 6, 1, 1, 1, 6, 6],
[0, 0, 0, 1, 1, 2, 6, 6, 1],
[0, 1, 6, 0, 6, 1, 0, 1, 6],
[1, 6, 1, 6, 1, 6, 1, 6, 1],
[1, 2, 2, 6, 1, 1, 6, 6, 6],
[2, 2, 3, 1, 1, 1, 6, 6, 1],
[0, 6, 1, 0, 1, 6, 0, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 2, 6, 1, 1, 6, 6, 6],
[1, 2, 2, 1, 1, 1, 6, 6, 1],
[0, 1, 6, 0, 1, 6, 0, 2, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 2, 1, 1, 1, 6, 6, 1],
[1, 6, 6, 2, 1, 6, 2, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 1, 1, 1, 1, 6, 6, 1],
[2, 1, 6, 2, 1, 6, 3, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 1, 1, 1, 1, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 2, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 1, 1, 1, 1, 6, 6, 1],
[1, 1, 6, 2, 1, 6, 2, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 1, 1, 1, 1, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 1, 6, 1, 1, 6, 6, 6],
[1, 1, 6, 1, 1, 6, 2, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 6, 1, 1, 6, 1, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 6, 1, 1, 6, 1, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 6, 1, 1, 6, 1, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 1, 6, 1, 1, 6, 1, 1, 1],
[1, 6, 6, 6, 1, 6, 6, 6, 1],
[1, 6, 6, 1, 1, 6, 1, 1, 6],
[1, 6, 6, 6, 1, 6, 6, 6, 1]]),
array([[0, 0, 0, 7, 1, 1, 6, 6, 7],
[0, 0, 0, 1, 1, 2, 6, 7, 7],
[0, 0, 0, 1, 2, 2, 7, 7, 7],
[0, 0, 0, 2, 2, 3, 7, 7, 1],
[0, 0, 0, 2, 3, 3, 7, 1, 1],
[0, 0, 0, 3, 3, 4, 1, 1, 1],
[0, 0, 0, 3, 4, 4, 1, 1, 2],
[2, 3, 3, 7, 1, 1, 6, 7, 7],
[3, 3, 4, 1, 1, 1, 7, 7, 7],
[3, 4, 4, 1, 1, 2, 7, 7, 7],
[4, 4, 5, 1, 2, 2, 7, 7, 1],
[4, 5, 5, 2, 2, 2, 7, 1, 1],
[5, 5, 6, 2, 2, 3, 1, 1, 1],
[5, 6, 6, 2, 3, 3, 1, 1, 1],
[2, 2, 2, 7, 1, 1, 7, 7, 7],
[2, 2, 3, 1, 1, 1, 7, 7, 7],
[2, 3, 3, 1, 1, 1, 7, 7, 7],
[3, 3, 3, 1, 1, 2, 7, 7, 1],
[3, 3, 4, 1, 2, 2, 7, 1, 1],
[1, 2, 2, 7, 1, 1, 7, 7, 7],
[0, 7, 6, 0, 1, 6, 0, 1, 7],
[0, 1, 6, 0, 1, 7, 0, 2, 7],
[0, 1, 7, 0, 2, 7, 0, 2, 7],
[0, 2, 7, 0, 2, 7, 0, 3, 1],
[0, 2, 7, 0, 3, 1, 0, 3, 1],
[2, 7, 6, 3, 1, 7, 3, 1, 7],
[0, 3, 1, 0, 3, 1, 0, 4, 1],
[3, 1, 7, 3, 1, 7, 4, 1, 7],
[0, 3, 1, 0, 4, 1, 0, 4, 2],
[3, 1, 7, 4, 1, 7, 4, 2, 7],
[4, 1, 7, 4, 2, 7, 5, 2, 1],
[4, 2, 7, 5, 2, 1, 5, 2, 1],
[2, 7, 7, 2, 1, 7, 2, 1, 7],
[5, 2, 1, 5, 2, 1, 6, 3, 1],
[2, 1, 7, 2, 1, 7, 3, 1, 7],
[5, 2, 1, 6, 3, 1, 6, 3, 1],
[2, 1, 7, 3, 1, 7, 3, 1, 7],
[3, 1, 7, 3, 1, 7, 3, 2, 1],
[3, 1, 7, 3, 2, 1, 4, 2, 1],
[1, 7, 7, 2, 1, 7, 2, 1, 7]]),
array([[0, 0, 0, 8, 8, 1, 6, 6, 7],
[0, 0, 0, 8, 1, 1, 6, 7, 7],
[0, 0, 0, 1, 1, 2, 7, 7, 7],
[0, 0, 0, 1, 2, 2, 7, 7, 8],
[0, 0, 0, 2, 2, 3, 7, 8, 8],
[0, 0, 0, 2, 3, 3, 8, 8, 8],
[0, 0, 0, 3, 3, 4, 8, 8, 1],
[0, 0, 0, 3, 4, 4, 8, 1, 1],
[0, 0, 0, 4, 4, 5, 1, 1, 1],
[0, 0, 0, 4, 5, 5, 1, 1, 2],
[3, 4, 4, 8, 1, 1, 7, 7, 7],
[4, 4, 5, 1, 1, 1, 7, 7, 8],
[4, 5, 5, 1, 1, 2, 7, 8, 8],
[0, 8, 6, 0, 8, 6, 0, 1, 7],
[0, 8, 6, 0, 1, 7, 0, 1, 7],
[0, 1, 7, 0, 1, 7, 0, 2, 7],
[0, 1, 7, 0, 2, 7, 0, 2, 8],
[0, 2, 7, 0, 2, 8, 0, 3, 8],
[0, 2, 8, 0, 3, 8, 0, 3, 8],
[0, 3, 8, 0, 3, 8, 0, 4, 1],
[0, 3, 8, 0, 4, 1, 0, 4, 1],
[3, 8, 7, 4, 1, 7, 4, 1, 7],
[0, 4, 1, 0, 4, 1, 0, 5, 1],
[4, 1, 7, 4, 1, 7, 5, 1, 8],
[0, 4, 1, 0, 5, 1, 0, 5, 2],
[4, 1, 7, 5, 1, 8, 5, 2, 8]]),
array([[0, 0, 0, 0, 1, 0, 0, 0, 3],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 8, 0, 8, 6],
[0, 0, 0, 0, 8, 0, 8, 6, 8],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 0, 8, 0, 0, 0],
[0, 0, 8, 0, 8, 6, 0, 0, 8],
[0, 8, 0, 8, 6, 8, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 8, 0, 8, 6],
[0, 0, 0, 0, 8, 0, 8, 6, 8],
[0, 0, 0, 0, 0, 8, 0, 0, 0],
[0, 0, 8, 0, 8, 6, 0, 0, 8],
[0, 8, 0, 8, 6, 8, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 3],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 8, 0, 8, 6],
[0, 0, 0, 0, 8, 0, 8, 6, 8],
[0, 0, 0, 0, 0, 0, 0, 0, 8],
[0, 0, 0, 0, 0, 8, 0, 8, 6],
[0, 0, 0, 0, 8, 0, 8, 6, 8],
[0, 0, 0, 0, 0, 8, 0, 0, 0],
[0, 0, 8, 0, 8, 6, 0, 0, 8],
[0, 8, 0, 8, 6, 8, 0, 8, 0],
[0, 0, 0, 0, 0, 8, 0, 0, 0],
[0, 0, 8, 0, 8, 6, 0, 0, 8],
[0, 8, 0, 8, 6, 8, 0, 8, 0]]),
array([[0, 0, 0, 0, 0, 0, 0, 0, 4],
[0, 0, 0, 0, 0, 0, 0, 4, 8],
[0, 0, 0, 0, 0, 0, 4, 8, 4],
[0, 0, 0, 0, 0, 0, 0, 0, 4],
[0, 0, 0, 0, 0, 0, 0, 4, 8],
[0, 0, 0, 0, 0, 0, 0, 4, 8],
[0, 0, 0, 0, 0, 0, 4, 8, 4],
[0, 0, 0, 0, 0, 0, 0, 0, 4],
[0, 0, 0, 0, 0, 0, 0, 4, 8],
[0, 0, 0, 0, 0, 0, 4, 8, 4]]),
array([[0, 0, 0, 0, 5, 5, 0, 5, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 5],
[5, 5, 5, 0, 0, 5, 0, 0, 5],
[0, 0, 5, 0, 0, 5, 2, 0, 5],
[0, 0, 5, 2, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 0],
[2, 2, 0, 2, 0, 0, 0, 0, 0],
[2, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 5],
[5, 0, 0, 5, 0, 0, 5, 5, 5],
[0, 0, 2, 0, 0, 0, 5, 5, 5],
[0, 2, 0, 0, 0, 0, 5, 5, 5],
[2, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 5, 0, 0, 5, 5, 5, 5]]),
array([[0, 0, 0, 0, 0, 0, 0, 0, 5],
[0, 0, 0, 0, 0, 0, 0, 5, 5],
[0, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 0, 0, 0, 0, 5, 5, 5],
[0, 0, 0, 0, 0, 5, 0, 0, 5],
[0, 0, 0, 0, 5, 5, 0, 5, 0],
[0, 0, 0, 5, 5, 5, 5, 0, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 0],
[0, 0, 0, 5, 5, 5, 0, 0, 5],
[0, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 5, 0, 5, 0, 0, 5, 0],
[5, 5, 5, 0, 0, 0, 3, 3, 0],
[5, 5, 5, 0, 0, 5, 3, 0, 5],
[0, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 0],
[0, 0, 0, 3, 3, 0, 3, 3, 0],
[0, 0, 5, 3, 0, 5, 3, 0, 5],
[0, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 0],
[3, 3, 0, 3, 3, 0, 3, 3, 0],
[3, 0, 5, 3, 0, 5, 3, 0, 5],
[0, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 0],
[5, 0, 3, 5, 0, 3, 5, 0, 0],
[0, 3, 3, 0, 3, 3, 0, 0, 0],
[3, 3, 3, 3, 3, 3, 0, 0, 0],
[3, 3, 0, 3, 3, 0, 0, 0, 0],
[3, 0, 5, 3, 0, 5, 0, 0, 5],
[0, 0, 5, 0, 0, 5, 0, 0, 5],
[0, 5, 0, 0, 5, 0, 0, 5, 5],
[5, 0, 3, 5, 0, 0, 5, 5, 5],
[0, 3, 3, 0, 0, 0, 5, 5, 5],
[3, 3, 3, 0, 0, 0, 5, 5, 5],
[3, 3, 0, 0, 0, 0, 5, 5, 5],
[3, 0, 5, 0, 0, 5, 5, 5, 5]]),
array([[0, 0, 0, 0, 0, 6, 0, 0, 6],
[0, 0, 0, 0, 6, 6, 0, 6, 0],
[0, 0, 0, 6, 6, 6, 6, 0, 6],
[0, 0, 0, 6, 6, 0, 0, 6, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[6, 6, 0, 0, 6, 0, 0, 0, 0],
[6, 0, 0, 6, 0, 0, 0, 0, 6],
[0, 0, 0, 0, 0, 0, 0, 6, 6],
[6, 0, 0, 0, 0, 6, 0, 0, 6],
[0, 0, 0, 0, 6, 6, 0, 6, 0]]),
array([[0, 0, 0, 0, 0, 3, 0, 0, 3],
[0, 0, 0, 3, 3, 3, 3, 0, 3]])]
#matching the key points of the query and the reference image and determining the similarity and displaying the most similar image from the reference set
def sim(descriptor1, descriptor2):
return (descriptor1 == descriptor2).mean(-1)
def similarity(image1, image2):
descriptor1 = keypoints_descriptors(image1)
descriptor2 = keypoints_descriptors(image2)
nearest = np.max((sim(descriptor1[:, np.newaxis], descriptor2[np.newaxis])), -1) #similarity of the descriptors of the key points
score = np.mean(nearest) #mean similarity of the key points
return score
def find_most_similiar_image(query, ref_images):
curr_score, query_out = 0, None
for image in ref_images:
image_score = similarity(query, image)
if image_score > curr_score:
curr_score = image_score
query_out = image
return query_out, curr_score
query_response, score = find_most_similiar_image(p1_input, ref_images)
h_grid(np.concatenate([p1_input, query_response],1), scale=0.3)
print(f"Similarity between query- p1_input and query response: {score}")
Similarity between query- p1_input and query response: 0.7415204678362571
query_response, score = find_most_similiar_image(p2_input, ref_images)
print(f"Similarity between query- p2_input and query response: {score}")
h_grid(np.concatenate([p2_input, query_response],1), scale=0.5)
Similarity between query- p2_input and query response: 0.9629629629629629
query_response, score = find_most_similiar_image(p3_input, ref_images)
print(f"Similarity between query- p3_input and query response: {score}")
h_grid(np.concatenate([p3_input, query_response],1), scale=0.3)
Similarity between query- p3_input and query response: 1.0
query_response, score = find_most_similiar_image(p4_input, ref_images)
print(f"Similarity between query- p4_input and query response: {score}")
h_grid(np.concatenate([p4_input, query_response],1), scale=0.3)
Similarity between query- p4_input and query response: 1.0
The OpenCV library contains a set of implementations of popular descriptors and additional functionalities for finding matches and for displaying processing results.
A good image descriptor should be invariant to the following variations:
Each of the descriptors in the OpenCV library has the following functions:
Algorithm presented in the scientific article 'A Combined Corner and Edge Detector' in 1988 by authors Chris Harris and Mike Stephens.
The algorithm uses the sum of the changes in the intensity of adjacent pixels to find flat areas, edges, and corners. The first step in finding these areas is to expand the formula for the above sum into a Taylor series and then calculate a value based on the determinant and trace of the gradient matrix, which will directly indicate the classification of the area.
where $I_x$ represents the partial derivative of the image intensity $I(x,y)$ with respect to the x-coordinate. 3. Calculation of the final value:
$$R = det(M) - k(trace(M))^2$$$$det(M) = \lambda_1 \lambda_2$$$$trace(M) = \lambda_1 + \lambda_2$$Determining whether a given area is a corner, edge or flat area is made in accordance with the following:
img_blocks = cv2.imread("chessboard.png", cv2.IMREAD_COLOR)
img_blocks = cv2.resize(img_blocks, None, fx=0.5, fy=0.5)
img_blocks_gray = cv2.cvtColor(img_blocks, cv2.COLOR_BGR2GRAY)
img_blocks_gray = img_blocks_gray.astype(np.float32)
dst = cv2.cornerHarris(img_blocks_gray, 3, 3, 0.04)
# result is dilated for marking the corners, not important
dst = cv2.dilate(dst, None)
# Threshold for an optimal value, it may vary depending on the image.
img_blocks[dst > 0.01 * dst.max()] = [0, 0, 255]
imshow(img_blocks)
The author of the algorithm is D.Lowe, who presented it for the first time in the work Distinctive Image Features from Scale-Invariant Keypoints in 2004.
It is an algorithm that determines key points and their descriptors based on gradients in their close surroundings. The next steps of the algorithm are as follows:
The input image is scaled multiple times (x1, x0.5, x0.25, x0.125) and then Gauss filters are made on each scale (eg size 2, 4, 6, 8). Thus, by considering different scales for finding keypoints, the keypoints found will be scale invariant.
The difference operation is performed between neighboring Gaussians in a given scale (directly on the image intensities).
Each pixel is compared with its surroundings (eg 8 neighboring pixels - 3x3) and it is checked if a given pixel is an extreme value.
Similarly, the same pixel is compared with the pixels on a lower and higher scale (scale = Gaussian filter size).
The extreme end values are key point candidates. To select key points, an additional filtering operation is performed based on the contrast of the pixel's surroundings.
For key points, their neighboring regions are chosen, and gradients are computed for each pixel within these regions. These gradients are used to calculate their magnitude and orientation. Then an orientation histogram with 36 bins covering 360 degrees is created.
The resultant histogram invariably exhibits at least one prominent peak, which is subsequently designated as the keypoint's orientation. If there exist multiple peaks surpassing 80% of the maximum value, a new keypoint copy is generated for each of them, with each copy associated with its respective primary orientation.
The final stage involves the generation of a keypoint descriptor. To achieve this, the surrounding area is divided into subregions, within which both magnitude and orientation are computed. The keypoint's orientation is subtracted from each calculated orientation. Based on these values, an orientation histogram is constructed using 8 bins. For the illustration above, we have 4 mini-regions and each will have 8 values. Ultimately, the descriptor will have a size of 32 (each key point will have 32 values).
img_clevr = cv2.imread("clevr.jpg", cv2.IMREAD_COLOR)
img_clevr = cv2.resize(img_clevr, None, fx=0.5, fy=0.5)
img_clevr_gray = cv2.cvtColor(img_clevr, cv2.COLOR_BGR2GRAY)
sift = cv2.SIFT_create()
kp, desc = sift.detectAndCompute(img_clevr_gray, None)
img_clevr = cv2.drawKeypoints(
img_clevr, kp, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS
)
imshow(img_clevr)
print(desc.shape)
print(desc[0])
(123, 128) [ 51. 10. 1. 25. 50. 1. 0. 17. 16. 35. 77. 151. 150. 9. 0. 0. 131. 145. 65. 17. 6. 1. 0. 4. 19. 10. 1. 1. 4. 6. 2. 4. 10. 33. 27. 20. 78. 5. 0. 7. 52. 11. 6. 38. 151. 65. 8. 12. 151. 49. 3. 3. 33. 11. 7. 57. 59. 5. 0. 0. 0. 6. 5. 11. 0. 56. 49. 11. 22. 4. 0. 0. 10. 12. 12. 12. 127. 110. 73. 45. 116. 4. 0. 0. 8. 18. 55. 151. 20. 1. 0. 0. 0. 0. 0. 8. 0. 24. 22. 0. 0. 0. 0. 0. 2. 46. 20. 0. 1. 5. 9. 6. 4. 6. 1. 0. 0. 1. 6. 13. 0. 0. 0. 0. 0. 0. 0. 0.]
The algorithm proposed in the research paper '' BRIEF: Binary Robust Independent Elementary Features '' by the authors Michael Calonder, Vincent Lepetit, Christoph Strecha, and Pascal Fua.
The algorithm is used to calculate descriptors for a given pixel and its surroundings and does not include the stage of key point detection, so it must always be applied to known (previously discovered) key points.
The algorithm is as follows:
h([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
| 0 | 1 | 2 |
| 3 | 4 | 5 |
| 6 | 7 | 8 |
n = 10
# all combinations
p = list(itertools.combinations(range(9), 2))
# only n random combinations
p = random.sample(p, n)
print(p)
[(2, 4), (4, 5), (0, 5), (1, 2), (3, 8), (7, 8), (2, 5), (4, 7), (5, 7), (0, 4)]
x = np.array([[255, 100, 0], [20, 0, 100], [10, 255, 10]])
h(x)
| 255 | 100 | 0 |
| 20 | 0 | 100 |
| 10 | 255 | 10 |
x_flat = np.reshape(x, -1)
pairs = [(x_flat[pi[0]], x_flat[pi[1]]) for pi in p]
print("The values of all par:")
print(pairs)
bin_vals = [x == y for x, y in pairs]
print("\nThe compliance of the pixel values of par of neighbours:")
print(bin_vals)
brief_desc = np.array(bin_vals, np.int32)
print("\nFinal brief descriptor:")
print(brief_desc)
The values of all par: [(0, 0), (0, 100), (255, 100), (100, 0), (20, 10), (255, 10), (0, 100), (0, 255), (100, 255), (255, 0)] The compliance of the pixel values of par of neighbours: [True, False, False, False, False, False, False, False, False, False] Final brief descriptor: [1 0 0 0 0 0 0 0 0 0]
# %pip install opencv-python==4.10.0 opencv-contrib-python==4.10.0
# %pip freeze > asdad.txr
# %pip install --user opencv-python-headless==4.10.0.82
# %pip install opencv-contrib-python
img_clevr = cv2.imread("clevr.jpg", cv2.IMREAD_COLOR)
img_clevr = cv2.resize(img_clevr, None, fx=0.5, fy=0.5)
img_clevr_gray = cv2.cvtColor(img_clevr, cv2.COLOR_BGR2GRAY)
sift = cv2.SIFT_create()
brief = cv2.xfeatures2d.BriefDescriptorExtractor_create(bytes=32)
kp = sift.detect(img_clevr_gray, None)
kp, desc = brief.compute(img_clevr_gray, kp)
img_clevr = cv2.drawKeypoints(img_clevr, kp, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
imshow(img_clevr)
print("\nThe size of the descriptor in bytes:", brief.descriptorSize())
print("The number of random pairs of comparisons:", brief.descriptorSize() * 8)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In [228], line 6 3 img_clevr_gray = cv2.cvtColor(img_clevr, cv2.COLOR_BGR2GRAY) 5 sift = cv2.SIFT_create() ----> 6 brief = cv2.xfeatures2d.BriefDescriptorExtractor_create(bytes=32) 7 kp = sift.detect(img_clevr_gray, None) 8 kp, desc = brief.compute(img_clevr_gray, kp) AttributeError: module 'cv2' has no attribute 'xfeatures2d'
The OpenCV implementation operates on bits, but the data (due to the specificity of Python) are returned in the form of bytes. This means that if when creating the descriptor we choose its size as 32 bytes, the BRIEF will look for 256 pairs for each pixel neighborhood.
The result will be returned in bytes (which makes no difference, because the descriptors are compared with Hamming distance).
print(desc.shape)
print(desc[0])
(123, 128) [ 51. 10. 1. 25. 50. 1. 0. 17. 16. 35. 77. 151. 150. 9. 0. 0. 131. 145. 65. 17. 6. 1. 0. 4. 19. 10. 1. 1. 4. 6. 2. 4. 10. 33. 27. 20. 78. 5. 0. 7. 52. 11. 6. 38. 151. 65. 8. 12. 151. 49. 3. 3. 33. 11. 7. 57. 59. 5. 0. 0. 0. 6. 5. 11. 0. 56. 49. 11. 22. 4. 0. 0. 10. 12. 12. 12. 127. 110. 73. 45. 116. 4. 0. 0. 8. 18. 55. 151. 20. 1. 0. 0. 0. 0. 0. 8. 0. 24. 22. 0. 0. 0. 0. 0. 2. 46. 20. 0. 1. 5. 9. 6. 4. 6. 1. 0. 0. 1. 6. 13. 0. 0. 0. 0. 0. 0. 0. 0.]
Algorithm for corner detection by means of a simple and quick test of the pixel's surroundings. Proposed in the paper 'Machine learning for high-speed corner detection' by Edward Rosten and Tom Drummond.
This approach is characterized by a very short duration of action and a relatively high effectiveness of action.
The algorithm performs the following operations:
1.The intensity of the next pixel is taken ($I (x, y)$) and the cut-off threshold is selected ($ \lambda_ {x, y} $), based on the intensity taken,
img_clevr = cv2.imread("clevr.jpg", cv2.IMREAD_COLOR)
img_clevr = cv2.resize(img_clevr, None, fx=0.5, fy=0.5)
img_clevr_gray = cv2.cvtColor(img_clevr, cv2.COLOR_BGR2GRAY)
fast = cv2.FastFeatureDetector_create()
kp = fast.detect(img_clevr_gray, None)
img_clevr = cv2.drawKeypoints(img_clevr, kp, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
imshow(img_clevr)
An algorithm that combines the approach proposed by FAST and BRIEF. Presented in ORB: An efficient alternative to SIFT or SURF by Ethan Rublee, Vincent Rabaud, Kurt Konolige and Gary R. Bradski.
img_clevr = cv2.imread("clevr.jpg", cv2.IMREAD_COLOR)
img_clevr = cv2.resize(img_clevr, None, fx=0.5, fy=0.5)
img_clevr_gray = cv2.cvtColor(img_clevr, cv2.COLOR_BGR2GRAY)
orb = cv2.ORB_create()
kp, desc = orb.detectAndCompute(img_clevr_gray, None)
img_clevr = cv2.drawKeypoints(img_clevr, kp, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
imshow(img_clevr)
print(desc.shape)
print(desc[0])
(465, 32) [ 50 77 98 190 86 128 238 246 212 0 216 66 49 203 177 64 120 234 19 203 200 187 6 255 48 36 170 136 61 187 112 168]
The matching of key points is done on the basis of their descriptors. OpenCV includes an object implementation to find both best and k-best matches.
In order to obtain correct results, the appropriate descriptor similarity function should be used. For descriptors that operate in the Euclidean space, this may be the normal Euclidean distance. For descriptors such as BRISK, which produce binary descriptors, the Hamming distance would be a better proposition.
Let's load two sample images that show the same object from different angles:
img_graf1 = cv2.imread("graf1.png", cv2.IMREAD_COLOR)
img_graf1 = cv2.resize(img_graf1, None, fx=0.5, fy=0.5)
img_graf2 = cv2.imread("graf2.png", cv2.IMREAD_COLOR)
img_graf2 = cv2.resize(img_graf2, None, fx=0.5, fy=0.5)
imshow(np.concatenate([img_graf1, img_graf2], 1))
img_graf1_gray = cv2.cvtColor(img_graf1, cv2.COLOR_BGR2GRAY)
img_graf2_gray = cv2.cvtColor(img_graf2, cv2.COLOR_BGR2GRAY)
imshow(np.concatenate([img_graf1_gray, img_graf2_gray], 1))
Then, for their representation in the Grayscale color space, let's find the key points and their descriptors using the algorithms below.
sift = cv2.SIFT_create() # kp + desc
orb = cv2.ORB_create() # kp + desc
fast = cv2.FastFeatureDetector_create() # only kp
star = cv2.xfeatures2d.StarDetector_create() # only kp
brief = cv2.xfeatures2d.BriefDescriptorExtractor_create() # only desc
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In [52], line 4 2 orb = cv2.ORB_create() # kp + desc 3 fast = cv2.FastFeatureDetector_create() # only kp ----> 4 star = cv2.xfeatures2d.StarDetector_create() # only kp 5 brief = cv2.xfeatures2d.BriefDescriptorExtractor_create() AttributeError: module 'cv2' has no attribute 'xfeatures2d'
sift_kp1, sift_desc1 = sift.detectAndCompute(img_graf1_gray, None)
sift_kp2, sift_desc2 = sift.detectAndCompute(img_graf2_gray, None)
orb_kp1, orb_desc1 = orb.detectAndCompute(img_graf1_gray, None)
orb_kp2, orb_desc2 = orb.detectAndCompute(img_graf2_gray, None)
fast_kp1 = fast.detect(img_graf1_gray, None)
fast_kp2 = fast.detect(img_graf2_gray, None)
star_kp1 = star.detect(img_graf1_gray, None)
star_kp2 = star.detect(img_graf2_gray, None)
print("\nSIFT:")
print("Number of key points in the first image:", len(sift_kp1))
print("Number of key points in the second image:", len(sift_kp2))
print("\nORB:")
print("Number of key points in the first image:", len(orb_kp1))
print("Number of key points in the second image:", len(orb_kp2))
print("\nFAST:")
print("Number of key points in the first image:", len(fast_kp1))
print("Number of key points in the second image:", len(fast_kp2))
print("\nSTAR:")
print("Number of key points in the first image:", len(star_kp1))
print("Number of key points in the second image:", len(star_kp2))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [53], line 10 7 fast_kp1 = fast.detect(img_graf1_gray, None) 8 fast_kp2 = fast.detect(img_graf2_gray, None) ---> 10 star_kp1 = star.detect(img_graf1_gray, None) 11 star_kp2 = star.detect(img_graf2_gray, None) 13 print("\nSIFT:") NameError: name 'star' is not defined
def show_kp(kp1, kp2):
img_kp1 = cv2.drawKeypoints(
img_graf1, kp1, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT
)
img_kp2 = cv2.drawKeypoints(
img_graf2, kp2, None, flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT
)
imshow(np.concatenate([img_kp1, img_kp2], 1))
print("\nSIFT Keypoints:\n")
show_kp(sift_kp1, sift_kp2)
print("\nORB Keypoints:\n")
show_kp(orb_kp1, orb_kp2)
print("\nFAST Keypoints:\n")
show_kp(fast_kp1, fast_kp2)
print("\nSTAR Keypoints:\n")
show_kp(star_kp1, star_kp2)
SIFT Keypoints:
ORB Keypoints:
FAST Keypoints:
STAR Keypoints:
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [55], line 11 8 show_kp(fast_kp1, fast_kp2) 10 print("\nSTAR Keypoints:\n") ---> 11 show_kp(star_kp1, star_kp2) NameError: name 'star_kp1' is not defined
Additionally, for the FAST and STAR key points, let's count the BRIEF descriptors.
_, fast_brief_desc1 = brief.compute(img_graf1_gray, fast_kp1)
_, fast_brief_desc2 = brief.compute(img_graf2_gray, fast_kp2)
_, star_brief_desc1 = brief.compute(img_graf1_gray, star_kp1)
_, star_brief_desc2 = brief.compute(img_graf2_gray, star_kp2)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [56], line 1 ----> 1 _, fast_brief_desc1 = brief.compute(img_graf1_gray, fast_kp1) 2 _, fast_brief_desc2 = brief.compute(img_graf2_gray, fast_kp2) 4 _, star_brief_desc1 = brief.compute(img_graf1_gray, star_kp1) NameError: name 'brief' is not defined
For the above generated data, we will check the best fit as well as the k-nearest matches, based on Euclidean and Hamming distance.
bf_l2 = cv2.BFMatcher(cv2.NORM_L2)
bf_hamming = cv2.BFMatcher(cv2.NORM_HAMMING)
def show_matches(bf, kp1, desc1, kp2, desc2, num_matches=30):
matches = bf.match(desc1, desc2)
matches = sorted(matches, key=lambda x: x.distance)
matches = cv2.drawMatches(
img_graf1,
kp1,
img_graf2,
kp2,
matches[:num_matches],
None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
)
imshow(matches)
# SIFT keypoints
show_matches(bf_l2, sift_kp1, sift_desc1, sift_kp2, sift_desc2, 50)
# ORB keypoints
show_matches(bf_l2, orb_kp1, orb_desc1, orb_kp2, orb_desc2, 50)
show_matches(bf_hamming, orb_kp1, orb_desc1, orb_kp2, orb_desc2, 50)
# FAST keypoints
show_matches(bf_l2, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50)
show_matches(bf_hamming, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [65], line 2 1 # FAST keypoints ----> 2 show_matches(bf_l2, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50) 3 show_matches(bf_hamming, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50) NameError: name 'fast_brief_desc1' is not defined
# STAR keypoints
show_matches(bf_l2, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50)
show_matches(bf_hamming, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [66], line 2 1 # STAR keypoints ----> 2 show_matches(bf_l2, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50) 3 show_matches(bf_hamming, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50) NameError: name 'star_kp1' is not defined
def show_knn_matches(bf, kp1, desc1, kp2, desc2, num_matches=30, k=2):
matches = bf.knnMatch(desc1, desc2, k=k)
# taking k = 2 you can use additional filtering described in the article on SIFT
matches = [[m] for m, n in matches if m.distance < 0.75 * n.distance]
matches = cv2.drawMatchesKnn(
img_graf1,
kp1,
img_graf2,
kp2,
matches[:num_matches],
None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
)
imshow(matches)
# SIFT keypoints
show_knn_matches(bf_l2, sift_kp1, sift_desc1, sift_kp2, sift_desc2, 100)
# ORB keypoints
show_knn_matches(bf_l2, orb_kp1, orb_desc1, orb_kp2, orb_desc2, 50)
show_knn_matches(bf_hamming, orb_kp1, orb_desc1, orb_kp2, orb_desc2, 50)
# FAST keypoints
show_knn_matches(bf_l2, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50)
show_knn_matches(bf_hamming, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [70], line 2 1 # FAST keypoints ----> 2 show_knn_matches(bf_l2, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50) 3 show_knn_matches(bf_hamming, fast_kp1, fast_brief_desc1, fast_kp2, fast_brief_desc2, 50) NameError: name 'fast_brief_desc1' is not defined
# STAR keypoints
show_knn_matches(bf_l2, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50)
show_knn_matches(bf_hamming, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In [71], line 2 1 # STAR keypoints ----> 2 show_knn_matches(bf_l2, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50) 3 show_knn_matches(bf_hamming, star_kp1, star_brief_desc1, star_kp2, star_brief_desc2, 50) NameError: name 'star_kp1' is not defined
Implement an algorithm that will find the most similar images to your query. The algorithm should work for any key point detector (a descriptor with a key point detection function) and a descriptor (an object that returns its descriptor for a given key point).
Use the Cats and Dogs (Microsoft) dataset available on the site.
Sub-items to be performed:
import cv2
import glob
import random
import numpy as np
import matplotlib.pyplot as plt
def load_images():
cat_images = glob.glob('animals/PetImages/Cat/*.jpg')
dog_images = glob.glob('animals/PetImages/Dog/*.jpg')
random_cats = random.sample(cat_images, 10) #randomly selecting 10 images of cats
random_dogs = random.sample(dog_images, 10) #and of dogs
dataset = random_cats + random_dogs #creating the dataset from cats and dogs
query_candidates = list(set(cat_images + dog_images) - set(dataset)) #creating the query candidates from the rest of the images
query_image_path = random.choice(query_candidates) #randomly selecting one query image
return dataset, query_image_path
# animals, query_image = load_images()
def read_image(image_path, target_size=(256, 256)):
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
return image_path, cv2.resize(img, target_size)
# images = [read_image(image_path) for image_path in animals]
# query_image = read_image(query_image)
def display_image(image_path, target_size=(256, 256)):
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return cv2.resize(img, target_size)
def compute_similarity(image1, image2, detector, descriptor):
kp1, des1 = detector.detectAndCompute(image1, None) # keypoints and descriptors
kp2, des2 = detector.detectAndCompute(image2, None)
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
matches = bf.match(des1, des2) # matching the descriptors
distances = [m.distance for m in matches] # distances between the keypoints
similarity = sum(distances) / len(distances) if distances else float('inf') # similarity score
return similarity
def find_similar_images(dataset, query_image, detector, descriptor, top_n=5):
query_path, query_img = query_image # unpacking the query image
results = []
for image_path, dataset_img in dataset:
similarity = compute_similarity(query_img, dataset_img, detector, descriptor) # computing similarity
results.append((image_path, similarity))
results.sort(key=lambda x: x[1]) # sorting the results
return results[:top_n] # returning the top n results -> in this case 5
# dataset_paths, query_image_path = load_images()
animals, query_image_p = load_images() # loading the images
dataset = [read_image(image_path) for image_path in animals]
query_image = read_image(query_image_p)
sift = cv2.SIFT_create()
orb = cv2.ORB_create()
print("Using SIFT:")
sift_results = find_similar_images(dataset, query_image, sift, sift)
images_similar=[display_image(query_image_p)]
for animal, score in sift_results:
images_similar.append(display_image(animal))
plt.figure(figsize=(20, 10))
plt.imshow(np.concatenate(images_similar, 1))
plt.show()
print("\nUsing ORB:")
orb_results = find_similar_images(dataset, query_image, orb, orb)
images_similar=[display_image(query_image_p)]
for animal, score in orb_results:
images_similar.append(display_image(animal))
plt.figure(figsize=(20, 10))
plt.imshow(np.concatenate(images_similar, 1))
plt.show()
query_class = "Cat" if "Cat" in query_image_p else "Dog" # checking the query class
sift_correct = sum(1 for img_path, _ in sift_results if query_class in img_path) # counting the correct matches
orb_correct = sum(1 for img_path, _ in orb_results if query_class in img_path)
print(f"\nSIFT: {sift_correct}/5 images match the query class.")
print(f"ORB: {orb_correct}/5 images match the query class.")
Using SIFT:
Using ORB:
SIFT: 4/5 images match the query class. ORB: 5/5 images match the query class.